Licenses and Inspections

Exploratory Data Analysis

Authors

Angel Rutherford

Ixchel Ramirez

Tess Vu

Published

December 1, 2025

Potential Project?

Research Question: Can we identify tipping point neighborhoods where eviction filings will surge next month to help with preemptive rental assistance distribution?

Method: Negative Binomial regression with rolling window cross-validation using monthly tract-level panel data. This could be an issue bc the data is quite zero inflated.

Data Scope: January 2020 - November 2025 eviction filings across Philadelphia census tracts with weekly and monthly granularity.

Key Stuff:

  • Extreme zero-inflation (60-70% zeros) is p bad for Negative Binomial …
  • Strong seasonality with summer-fall peaks requiring month fixed effects.
  • Moratorium policy created structural break from March 2020 through September 2021.
  • Racial majority strongly predicts filing intensity suggesting equity concerns.
  • High-frequency weekly data captures volatility but monthly data better for forecasting.

Setup and Initial Data Loading

Code
# Load libraries.
library(tidyverse)
library(lubridate)
library(sf)
library(scales)
library(patchwork)
library(viridis)
library(kableExtra)
library(corrplot)
library(ggridges)
library(forecast)
library(zoo)
library(knitr)

# Disable scientific notation.
options(scipen = 999)

# Consistent theme.
theme_set(theme_minimal(base_size = 12))

# Download viz when knitting.
opts_chunk$set(
  fig.path = "eviction_figures/", 
  dev = "png"
)

Monthly Tract-Level Data

Code
# Load monthly eviction filings data at census tract level.
df_monthly_raw <- read.csv("data/eviction/philadelphia_monthly_2020_2021.csv")

# Initial data structure and dimensions.
print(dim(df_monthly_raw))
[1] 29039     8
Code
print(names(df_monthly_raw))
[1] "type"                             "GEOID"                           
[3] "racial_majority"                  "month"                           
[5] "filings_2020"                     "filings_avg"                     
[7] "filings_avg_prepandemic_baseline" "last_updated"                    
Code
# First few rows to understand structure.
head(df_monthly_raw, 10) %>%
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
type GEOID racial_majority month filings_2020 filings_avg filings_avg_prepandemic_baseline last_updated
Census Tract 42101000101 White 01/2020 0 2.5 0.75
Census Tract 42101000101 White 02/2020 0 1.5 1.00
Census Tract 42101000101 White 03/2020 0 1.5 1.00
Census Tract 42101000101 White 04/2020 0 1.0 1.75
Census Tract 42101000101 White 05/2020 0 2.0 1.75
Census Tract 42101000101 White 06/2020 0 1.0 2.00
Census Tract 42101000101 White 07/2020 0 2.5 0.50
Census Tract 42101000101 White 08/2020 1 1.5 0.75
Census Tract 42101000101 White 09/2020 2 0.0 3.50
Census Tract 42101000101 White 10/2020 2 1.5 1.50

Weekly Tract-Level Data

Code
# Load weekly eviction filings data for high-frequency analysis.
df_weekly_raw <- read.csv("data/eviction/philadelphia_weekly_2020_2021.csv")

# Weekly data structure.
print(dim(df_weekly_raw))
[1] 125563      9
Code
head(df_weekly_raw, 10) %>%
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
type GEOID racial_majority week week_date filings_2020 filings_avg filings_avg_prepandemic_baseline last_updated
Census Tract 42101000101 White 1 2019-12-29 0 0.5 0.00 2025-11-15
Census Tract 42101000101 White 2 2020-01-05 0 0.5 0.25 2025-11-15
Census Tract 42101000101 White 3 2020-01-12 0 0.5 0.25 2025-11-15
Census Tract 42101000101 White 4 2020-01-19 0 1.0 0.00 2025-11-15
Census Tract 42101000101 White 5 2020-01-26 0 0.0 0.25 2025-11-15
Census Tract 42101000101 White 6 2020-02-02 0 0.0 0.25 2025-11-15
Census Tract 42101000101 White 7 2020-02-09 0 0.5 0.50 2025-11-15
Census Tract 42101000101 White 8 2020-02-16 0 0.0 0.00 2025-11-15
Census Tract 42101000101 White 9 2020-02-23 0 1.0 0.25 2025-11-15
Census Tract 42101000101 White 10 2020-03-01 0 0.0 0.25 2025-11-15

Claims Data

Code
# Load aggregated monthly claims data showing financial severity.
df_claims_raw <- read.csv("data/eviction/philadelphia_claims_monthly.csv")

# Claims data structure.
print(dim(df_claims_raw))
[1] 70  6
Code
head(df_claims_raw, 10) %>%
  kable(digits = 2) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
month_date median_claim sub1000 sub_med_rent over_six_months_rent median_claim_baseline
2020-01-01 1958.00 0.03 0.06 0.14 4200
2020-02-01 1944.00 0.03 0.06 0.14 4200
2020-03-01 2050.00 0.03 0.06 0.14 4200
2020-04-01 NA NA NA NA 4200
2020-05-01 NA NA NA NA 4200
2020-06-01 NA NA NA NA 4200
2020-07-01 3975.00 0.03 0.06 0.14 4200
2020-08-01 3285.31 0.03 0.06 0.14 4200
2020-09-01 3639.32 0.03 0.06 0.14 4200
2020-10-01 3422.50 0.03 0.06 0.14 4200

Data Cleaning and Feature Engineering

Monthly Data Preparation

Code
# Clean and prepare monthly data with temporal features.
df_monthly <- df_monthly_raw %>%
  # Rename filings variable for clarity.
  rename(filings_count = filings_2020) %>%
  # Parse month string to proper date format.
  mutate(
    date = as.Date(paste0("01/", month), format = "%d/%m/%Y"),
    year = year(date),
    month_num = month(date),
    month_name = month(date, label = TRUE, abbr = FALSE)
  ) %>%
  # Filter out rows with invalid dates.
  filter(!is.na(date)) %>%
  # Convert GEOID to character for joining operations.
  mutate(GEOID = as.character(GEOID)) %>%
  # Create policy intervention indicator for eviction moratorium.
  mutate(
    Moratorium_Active = ifelse(
      date >= as.Date("2020-03-01") & date <= as.Date("2021-09-30"),
      1,
      0
    ),
    Period = case_when(
      date < as.Date("2020-03-01") ~ "Pre-Moratorium",
      date >= as.Date("2020-03-01") & date <= as.Date("2021-09-30") ~ "Moratorium",
      date > as.Date("2021-09-30") ~ "Post-Moratorium"
    ),
    Period = factor(Period, levels = c("Pre-Moratorium", "Moratorium", "Post-Moratorium"))
  ) %>%
  # Create seasonal indicators for seasonality analysis.
  mutate(
    Season = case_when(
      month_num %in% c(12, 1, 2) ~ "Winter",
      month_num %in% c(3, 4, 5) ~ "Spring",
      month_num %in% c(6, 7, 8) ~ "Summer",
      month_num %in% c(9, 10, 11) ~ "Fall"
    ),
    Season = factor(Season, levels = c("Winter", "Spring", "Summer", "Fall"))
  )

# Print summary statistics of cleaned monthly data.
summary(df_monthly %>% select(filings_count, date, year, Moratorium_Active)) %>%
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover"))
filings_count date year Moratorium_Active
Min. : 0.000 Min. :2020-01-01 Min. :2020 Min. :0.0000
1st Qu.: 0.000 1st Qu.:2021-06-01 1st Qu.:2021 1st Qu.:0.0000
Median : 1.000 Median :2022-12-01 Median :2022 Median :0.0000
Mean : 2.286 Mean :2022-12-01 Mean :2022 Mean :0.2676
3rd Qu.: 3.000 3rd Qu.:2024-06-01 3rd Qu.:2024 3rd Qu.:1.0000
Max. :694.000 Max. :2025-11-01 Max. :2025 Max. :1.0000
Code
# Check temporal coverage.
df_monthly %>%
  summarize(
    Min_Date = min(date),
    Max_Date = max(date),
    N_Months = n_distinct(date),
    N_Tracts = n_distinct(GEOID),
    Total_Obs = n()
  ) %>%
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover"))
Min_Date Max_Date N_Months N_Tracts Total_Obs
2020-01-01 2025-11-01 71 409 29039

Weekly Data Preparation

Code
# Clean and prepare weekly data for high-frequency analysis.
df_weekly <- df_weekly_raw %>%
  # Rename filings variable.
  rename(filings_count = filings_2020) %>%
  # Convert week_date to proper date format.
  mutate(
    date = as.Date(week_date),
    year = year(date),
    month_num = month(date),
    week_of_year = week(date)
  ) %>%
  # Filter out rows with invalid dates.
  filter(!is.na(date)) %>%
  # Convert GEOID to character.
  mutate(GEOID = as.character(GEOID)) %>%
  # Create moratorium indicator.
  mutate(
    Moratorium_Active = ifelse(
      date >= as.Date("2020-03-01") & date <= as.Date("2021-09-30"),
      1,
      0
    )
  )

# Check weekly temporal coverage.
df_weekly %>%
  summarize(
    Min_Date = min(date),
    Max_Date = max(date),
    N_Weeks = n_distinct(date),
    N_Tracts = n_distinct(GEOID),
    Total_Obs = n()
  ) %>%
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover"))
Min_Date Max_Date N_Weeks N_Tracts Total_Obs
2019-12-29 2025-11-09 307 409 125563

Claims Data Preparation

Code
# Clean claims data showing financial severity of evictions.
df_claims <- df_claims_raw %>%
  # Convert month_date to proper date format.
  mutate(
    date = as.Date(month_date),
    year = year(date),
    month_num = month(date)
  ) %>%
  # Filter valid dates.
  filter(!is.na(date)) %>%
  # Calculate ratio to baseline.
  mutate(claim_ratio = median_claim / median_claim_baseline)

summary(df_claims %>% select(median_claim, median_claim_baseline, claim_ratio)) %>%
  kable(digits = 2) %>%
  kable_styling(bootstrap_options = c("striped", "hover"))
median_claim median_claim_baseline claim_ratio
Min. :1944 Min. :4200 Min. :0.4629
1st Qu.:4088 1st Qu.:4200 1st Qu.:0.9733
Median :4236 Median :4200 Median :1.0085
Mean :4346 Mean :4200 Mean :1.0348
3rd Qu.:4724 3rd Qu.:4200 3rd Qu.:1.1247
Max. :6128 Max. :4200 Max. :1.4591
NA's :3 NA NA's :3

Distribution Analysis

Monthly Filing Distribution

Code
# Calculate zero-inflation statistics for monthly data.
zero_stats_monthly <- df_monthly %>%
  summarize(
    Total_Obs = n(),
    Zero_Count = sum(filings_count == 0),
    Zero_Pct = Zero_Count / Total_Obs * 100,
    Positive_Count = sum(filings_count > 0),
    Mean_All = mean(filings_count),
    Mean_Positive = mean(filings_count[filings_count > 0]),
    Median_All = median(filings_count),
    Variance = var(filings_count),
    Dispersion_Ratio = Variance / Mean_All
  )

cat("Zero-Inflation Statistics (Monthly Data):\n")
Zero-Inflation Statistics (Monthly Data):
Code
cat(sprintf("Total Observations: %s\n", comma(zero_stats_monthly$Total_Obs)))
Total Observations: 29,039
Code
cat(sprintf("Zero Filings: %s (%.1f%%)\n", 
            comma(zero_stats_monthly$Zero_Count), 
            zero_stats_monthly$Zero_Pct))
Zero Filings: 10,752 (37.0%)
Code
cat(sprintf("Positive Filings: %s (%.1f%%)\n", 
            comma(zero_stats_monthly$Positive_Count), 
            100 - zero_stats_monthly$Zero_Pct))
Positive Filings: 18,287 (63.0%)
Code
cat(sprintf("\nMean (all): %.2f\n", zero_stats_monthly$Mean_All))

Mean (all): 2.29
Code
cat(sprintf("Mean (positive only): %.2f\n", zero_stats_monthly$Mean_Positive))
Mean (positive only): 3.63
Code
cat(sprintf("Variance: %.2f\n", zero_stats_monthly$Variance))
Variance: 28.75
Code
cat(sprintf("Dispersion Ratio: %.2f\n", zero_stats_monthly$Dispersion_Ratio))
Dispersion Ratio: 12.58
Code
# Create histogram data for raw and log-transformed counts.
df_hist_monthly <- df_monthly %>%
  mutate(
    Log_Count = log10(filings_count + 1),
    Scale = factor("Raw Count", levels = c("Raw Count", "Log10(Count + 1)"))
  )

# Duplicate data for log-transformed panel.
df_log_monthly <- df_hist_monthly %>%
  mutate(
    filings_count = Log_Count,
    Scale = factor("Log10(Count + 1)", levels = c("Raw Count", "Log10(Count + 1)"))
  )

High zero percentage could use zero-inflated Negative Binomial model, but I think we’re restricted to just regular NB, lmk if I’m wrong. Dispersion ratio > 1 justifies Negative Binomial over Poisson.

Code
# Combine for faceted viz.
df_final_hist_monthly <- bind_rows(df_hist_monthly, df_log_monthly)

# Create 1x2 faceted histogram showing raw and log distributions.
dist_plot_monthly <- ggplot(df_final_hist_monthly, aes(x = filings_count)) +
  geom_histogram(bins = 50, fill = "#E74C3C", color = "white", alpha = 0.8) +
  scale_x_continuous(labels = comma) +
  scale_y_continuous(labels = comma) +
  labs(
    title = "Distribution of Monthly Eviction Filings per Census Tract",
    subtitle = sprintf("%.1f%% zeros justify Zero-Inflated Negative Binomial (ZINB) model.\nNot sure if in project toolbox.", 
                       zero_stats_monthly$Zero_Pct),
    x = "Filings Count",
    y = "Frequency (Number of Observations)",
    caption = "Data: Philadelphia Eviction Filings 2020-2025 | Monthly tract-level observations"
  ) +
  facet_wrap(~ Scale, scales = "free", ncol = 2) +
  theme_minimal(base_size = 12) +
  theme(
    plot.title = element_text(face = "bold", size = 14),
    strip.text = element_text(face = "bold")
  )

# Display distribution plot demonstrating zero-inflation.
dist_plot_monthly

Weekly Filing Distribution

Code
# Calculate zero-inflation statistics for weekly data.
zero_stats_weekly <- df_weekly %>%
  summarize(
    Total_Obs = n(),
    Zero_Count = sum(filings_count == 0),
    Zero_Pct = Zero_Count / Total_Obs * 100,
    Mean_All = mean(filings_count),
    Variance = var(filings_count),
    Dispersion_Ratio = Variance / Mean_All
  )

cat("\nZero-Inflation Statistics (Weekly Data):\n")

Zero-Inflation Statistics (Weekly Data):
Code
cat(sprintf("Total Observations: %s\n", comma(zero_stats_weekly$Total_Obs)))
Total Observations: 125,563
Code
cat(sprintf("Zero Filings: %s (%.1f%%)\n", 
            comma(zero_stats_weekly$Zero_Count), 
            zero_stats_weekly$Zero_Pct))
Zero Filings: 88,027 (70.1%)
Code
cat(sprintf("Mean: %.2f\n", zero_stats_weekly$Mean_All))
Mean: 0.53
Code
cat(sprintf("Dispersion Ratio: %.2f\n", zero_stats_weekly$Dispersion_Ratio))
Dispersion Ratio: 5.62
Code
# Create histogram for weekly distribution.
weekly_hist <- df_weekly %>%
  filter(filings_count > 0) %>%
  ggplot(aes(x = filings_count)) +
  geom_histogram(bins = 30, fill = "#3498DB", color = "white", alpha = 0.8) +
  scale_x_continuous(labels = comma) +
  scale_y_continuous(labels = comma) +
  labs(
    title = "Distribution of Weekly Eviction Filings (Positive Counts Only)",
    subtitle = sprintf("%.1f%% of observations are zeros (not shown)", zero_stats_weekly$Zero_Pct),
    x = "Weekly Filings Count",
    y = "Frequency",
    caption = "Data: Philadelphia Weekly Eviction Filings | Excludes zero observations"
  ) +
  theme_minimal(base_size = 12) +
  theme(plot.title = element_text(face = "bold", size = 14))

weekly_hist

Quantile Analysis

Code
# Calculate quantiles for positive filings only.
quantile_stats_monthly <- df_monthly %>%
  filter(filings_count > 0) %>%
  summarize(
    Min = min(filings_count),
    Q01 = quantile(filings_count, 0.01),
    Q05 = quantile(filings_count, 0.05),
    Q25 = quantile(filings_count, 0.25),
    Median = median(filings_count),
    Q75 = quantile(filings_count, 0.75),
    Q95 = quantile(filings_count, 0.95),
    Q99 = quantile(filings_count, 0.99),
    Max = max(filings_count)
  )

# Monthly filing count distribution quantiles (positive counts only).
quantile_stats_monthly %>%
  pivot_longer(everything(), names_to = "Quantile", values_to = "Filings") %>%
  kable(digits = 1) %>%
  kable_styling(bootstrap_options = c("striped", "hover"))
Quantile Filings
Min 1
Q01 1
Q05 1
Q25 1
Median 3
Q75 5
Q95 10
Q99 16
Max 694

Temporal Analysis

Monthly Aggregate Trend with Policy Markers

Code
# Calculate total monthly filings across all tracts.
monthly_aggregate <- df_monthly %>%
  group_by(date, Period) %>%
  summarize(
    Total_Filings = sum(filings_count, na.rm = TRUE),
    Mean_Filings = mean(filings_count, na.rm = TRUE),
    Median_Filings = median(filings_count, na.rm = TRUE),
    N_Tracts = n_distinct(GEOID),
    .groups = "drop"
  )

# Create line plot with policy intervention markers.
monthly_trend_plot <- ggplot(monthly_aggregate, aes(x = date, y = Total_Filings)) +
  geom_line(color = "#C0392B", linewidth = 1.3) +
  geom_point(aes(color = Period), size = 3) +
  # Mark start of moratorium.
  geom_vline(xintercept = as.Date("2020-03-01"), 
             linetype = "dashed", color = "#27AE60", linewidth = 1) +
  annotate("text", x = as.Date("2020-03-01"), y = max(monthly_aggregate$Total_Filings) * 0.95,
           label = "Moratorium\nStarts", hjust = -0.1, color = "#27AE60", size = 4, fontface = "bold") +
  # Mark end of moratorium.
  geom_vline(xintercept = as.Date("2021-09-30"), 
             linetype = "dashed", color = "#E67E22", linewidth = 1) +
  annotate("text", x = as.Date("2021-09-30"), y = max(monthly_aggregate$Total_Filings) * 0.85,
           label = "Moratorium\nEnds", hjust = 1.1, color = "#E67E22", size = 4, fontface = "bold") +
  scale_y_continuous(labels = comma) +
  scale_color_manual(values = c("Pre-Moratorium" = "#3498DB", 
                                 "Moratorium" = "#27AE60", 
                                 "Post-Moratorium" = "#E67E22")) +
  labs(
    title = "Total Monthly Eviction Filings: System-Wide Trend (2020-2025)",
    subtitle = "Dramatic suppression during moratorium followed by surge after policy ended",
    x = "Date (Month)",
    y = "Total Monthly Filings Across All Census Tracts",
    color = "Policy Period",
    caption = "Data: Philadelphia Eviction Filings | Vertical lines mark policy changes"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    plot.title = element_text(face = "bold", size = 14),
    legend.position = "bottom"
  )

# Display system-wide trend with policy markers.
monthly_trend_plot

Code
# Print numeric statistics by period.
# Filings by policy period.
monthly_aggregate %>%
  group_by(date >= as.Date("2020-03-01") & date <= as.Date("2021-09-30")) %>%
  summarize(
    Period = ifelse(unique(`date >= as.Date("2020-03-01") & date <= as.Date("2021-09-30")`), 
                    "Moratorium", "Non-Moratorium"),
    Mean_Monthly = mean(Total_Filings),
    Median_Monthly = median(Total_Filings),
    Min_Monthly = min(Total_Filings),
    Max_Monthly = max(Total_Filings),
    .groups = "drop"
  ) %>%
  select(-1) %>%
  kable(digits = 0) %>%
  kable_styling(bootstrap_options = c("striped", "hover"))
Period Mean_Monthly Median_Monthly Min_Monthly Max_Monthly
Non-Moratorium 1126 1104 246 2015
Moratorium 412 494 0 813

Weekly Aggregate Trend

Code
# Calculate total weekly filings for volatility assessment.
weekly_aggregate <- df_weekly %>%
  group_by(date) %>%
  summarize(
    Total_Filings = sum(filings_count, na.rm = TRUE),
    .groups = "drop"
  )

# Create high-frequency weekly trend plot.
weekly_trend_plot <- ggplot(weekly_aggregate, aes(x = date, y = Total_Filings)) +
  geom_line(color = "#8E44AD", alpha = 0.6, linewidth = 0.8) +
  geom_smooth(method = "loess", se = TRUE, color = "#E74C3C", linewidth = 1.2, span = 0.1) +
  geom_vline(xintercept = as.Date("2020-03-01"), 
             linetype = "dashed", color = "#27AE60", alpha = 0.7) +
  geom_vline(xintercept = as.Date("2021-09-30"), 
             linetype = "dashed", color = "#E67E22", alpha = 0.7) +
  scale_y_continuous(labels = comma) +
  labs(
    title = "Weekly Eviction Filings: High-Frequency Volatility (2020-2025)",
    subtitle = "Smoothed trend (red) shows underlying pattern amid weekly fluctuations",
    x = "Date (Week Ending)",
    y = "Total Weekly Filings Across All Census Tracts",
    caption = "Data: Philadelphia Weekly Eviction Filings | Vertical Lines = Policy Changes"
  ) +
  theme_minimal(base_size = 12) +
  theme(plot.title = element_text(face = "bold", size = 14))

weekly_trend_plot

Seasonality Analysis

Code
# Calculate average filings by month of year for seasonality pattern.
seasonality_data <- df_monthly %>%
  filter(Period != "Moratorium") %>%
  group_by(month_num, month_name) %>%
  summarize(
    Mean_Filings = mean(filings_count, na.rm = TRUE),
    Median_Filings = median(filings_count, na.rm = TRUE),
    SD_Filings = sd(filings_count, na.rm = TRUE),
    N_Obs = n(),
    .groups = "drop"
  )

# Create seasonal pattern plot.
seasonality_plot <- ggplot(seasonality_data, aes(x = month_num, y = Mean_Filings)) +
  geom_line(color = "#16A085", linewidth = 1.3) +
  geom_point(color = "#16A085", size = 4) +
  geom_ribbon(aes(ymin = Mean_Filings - SD_Filings, 
                  ymax = Mean_Filings + SD_Filings),
              alpha = 0.2, fill = "#16A085") +
  scale_x_continuous(breaks = 1:12, labels = month.abb) +
  labs(
    title = "Seasonal Pattern of Eviction Filings (Non-Moratorium Periods)",
    subtitle = "Summer-Fall peak suggests back-to-school timing effect | Shaded area = ± 1 SD",
    x = "Month of Year",
    y = "Mean Filings per Tract",
    caption = "Data: Philadelphia Monthly Eviction Filings | Excludes moratorium period"
  ) +
  theme_minimal(base_size = 12) +
  theme(plot.title = element_text(face = "bold", size = 14))

seasonality_plot

Code
# Print seasonal statistics.
# Seasonal pattern statistics
seasonality_data %>%
  select(month_name, Mean_Filings, Median_Filings, SD_Filings) %>%
  kable(digits = 2, col.names = c("Month", "Mean", "Median", "SD")) %>%
  kable_styling(bootstrap_options = c("striped", "hover"))
Month Mean Median SD
January 3.37 2 4.05
February 2.80 1 4.05
March 2.68 2 3.09
April 2.24 1 2.77
May 2.69 2 3.26
June 2.77 2 3.98
July 2.77 2 3.28
August 3.07 1 17.56
September 2.90 2 6.51
October 2.74 2 3.25
November 2.16 1 3.13
December 2.82 2 3.34

Claims Severity Over Time

Code
# Median claim amount trend showing financial severity.
claims_plot <- ggplot(df_claims, aes(x = date, y = median_claim)) +
  geom_line(color = "#27AE60", linewidth = 1.2) +
  geom_point(color = "#27AE60", size = 2.5) +
  geom_hline(aes(yintercept = median_claim_baseline), 
             linetype = "dashed", color = "#E74C3C", linewidth = 1) +
  annotate("text", x = min(df_claims$date), y = df_claims$median_claim_baseline[1] * 1.05,
           label = sprintf("Pre-Pandemic Baseline: $%s", 
                          comma(df_claims$median_claim_baseline[1])),
           hjust = 0, color = "#E74C3C", fontface = "bold") +
  geom_vline(xintercept = as.Date("2020-03-01"), 
             linetype = "dotted", color = "gray50", alpha = 0.7) +
  geom_vline(xintercept = as.Date("2021-09-30"), 
             linetype = "dotted", color = "gray50", alpha = 0.7) +
  scale_y_continuous(labels = dollar) +
  labs(
    title = "Median Eviction Claim Amount Over Time",
    subtitle = "Claims increased during moratorium as arrears accumulated then stabilized",
    x = "Date (Month)",
    y = "Median Claim Amount (USD)",
    caption = "Data: Philadelphia Eviction Claims | Dashed line = pre-pandemic baseline"
  ) +
  theme_minimal(base_size = 12) +
  theme(plot.title = element_text(face = "bold", size = 14))

claims_plot

Code
# Calculate claim statistics by period.
df_claims %>%
  mutate(Period = case_when(
    date < as.Date("2020-03-01") ~ "Pre-Moratorium",
    date >= as.Date("2020-03-01") & date <= as.Date("2021-09-30") ~ "Moratorium",
    date > as.Date("2021-09-30") ~ "Post-Moratorium"
  )) %>%
  group_by(Period) %>%
  summarize(
    Mean_Claim = mean(median_claim, na.rm = TRUE),
    Median_Claim = median(median_claim, na.rm = TRUE),
    Ratio_to_Baseline = mean(claim_ratio, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  kable(digits = 2) %>%
  kable_styling(bootstrap_options = c("striped", "hover"))
Period Mean_Claim Median_Claim Ratio_to_Baseline
Moratorium 4243.75 4059.83 1.01
Post-Moratorium 4477.26 4299.73 1.07
Pre-Moratorium 1951.00 1951.00 0.46

Spatial and Racial Disparity Analysis

Filings by Racial Majority

Code
# Calculate total and mean filings by racial majority category.
racial_summary <- df_monthly %>%
  filter(!is.na(racial_majority), racial_majority != "") %>%
  group_by(racial_majority) %>%
  summarize(
    Total_Filings = sum(filings_count, na.rm = TRUE),
    Mean_Filings = mean(filings_count, na.rm = TRUE),
    Median_Filings = median(filings_count, na.rm = TRUE),
    N_Tract_Months = n(),
    N_Unique_Tracts = n_distinct(GEOID),
    .groups = "drop"
  ) %>%
  arrange(desc(Total_Filings))

# Create bar chart showing total filings by racial majority.
racial_plot <- ggplot(racial_summary, aes(x = reorder(racial_majority, Total_Filings), 
                                           y = Total_Filings)) +
  geom_col(aes(fill = Total_Filings), width = 0.7) +
  coord_flip() +
  scale_fill_gradient(low = "#BDC3C7", high = "#E74C3C", labels = comma) +
  scale_y_continuous(labels = comma) +
  labs(
    title = "Total Eviction Filings by Census Tract Racial Majority",
    subtitle = "Stark disparities indicate structural inequities requiring policy attention",
    x = "Census Tract Racial Majority",
    y = "Total Eviction Filings (2020-2025)",
    fill = "Total\nFilings",
    caption = "Data: Philadelphia Monthly Eviction Filings"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    plot.title = element_text(face = "bold", size = 14),
    legend.position = "right"
  )

racial_plot

Code
# Print detailed racial disparity statistics.
racial_summary %>%
  kable(digits = 1) %>%
  kable_styling(bootstrap_options = c("striped", "hover"))
racial_majority Total_Filings Mean_Filings Median_Filings N_Tract_Months N_Unique_Tracts
Black 33875 3.3 2 10366 146
White 14450 1.4 1 10437 147
Other 13201 2.0 1 6532 92
Hispanic 3805 2.3 2 1633 23

Weekly Filings by Racial Majority (Stacked Area)

Code
# Calculate weekly totals by racial majority.
weekly_racial <- df_weekly %>%
  filter(!is.na(racial_majority), racial_majority != "") %>%
  group_by(date, racial_majority) %>%
  summarize(Total_Filings = sum(filings_count, na.rm = TRUE), .groups = "drop")

# Create stacked area chart showing composition over time.
weekly_racial_plot <- ggplot(weekly_racial, 
                             aes(x = date, y = Total_Filings, fill = racial_majority)) +
  geom_area(alpha = 0.8, color = "white", linewidth = 0.3) +
  scale_y_continuous(labels = comma) +
  scale_fill_viridis_d(option = "turbo", end = 0.9) +
  labs(
    title = "Weekly Eviction Filings by Racial Majority: Stacked Composition",
    subtitle = "Proportional burden shifts over time revealing differential vulnerability",
    x = "Date (Week Ending)",
    y = "Total Weekly Filings",
    fill = "Racial Majority",
    caption = "Data: Philadelphia Weekly Eviction Filings"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    plot.title = element_text(face = "bold", size = 14),
    legend.position = "bottom"
  )

weekly_racial_plot

Tract-Level Heterogeneity

High-Risk Tract Identification

Code
# Calculate total filings per tract across entire period.
tract_totals <- df_monthly %>%
  group_by(GEOID, racial_majority) %>%
  summarize(
    Total_Filings = sum(filings_count, na.rm = TRUE),
    Mean_Monthly = mean(filings_count, na.rm = TRUE),
    Median_Monthly = median(filings_count, na.rm = TRUE),
    SD_Monthly = sd(filings_count, na.rm = TRUE),
    CV = SD_Monthly / Mean_Monthly,
    Months_Observed = n(),
    Months_With_Filings = sum(filings_count > 0),
    .groups = "drop"
  )

# Identify top 20 highest-risk tracts.
top_risk_tracts <- tract_totals %>%
  arrange(desc(Total_Filings)) %>%
  head(20)

# Top 20 highest-risk census tracts (Total Filings 2020-2025).
top_risk_tracts %>%
  select(GEOID, racial_majority, Total_Filings, Mean_Monthly, CV) %>%
  kable(digits = 2) %>%
  kable_styling(bootstrap_options = c("striped", "hover"))
GEOID racial_majority Total_Filings Mean_Monthly CV
sealed 1043 14.69 5.85
42101036100 White 831 11.70 1.17
42101021800 White 827 11.65 0.79
42101030100 Other 713 10.04 0.65
42101026800 Black 608 8.56 0.68
42101027200 Black 590 8.31 1.55
42101024000 Black 579 8.15 0.74
42101025200 Black 548 7.72 0.67
42101017800 Other 545 7.68 0.55
42101027600 Black 518 7.30 0.65
42101030000 Black 508 7.15 0.58
42101023800 Black 491 6.92 0.79
42101015200 Black 487 6.86 0.70
42101020101 Black 471 6.63 0.74
42101034600 Other 471 6.63 0.88
42101008302 Black 455 6.41 0.69
42101035302 White 455 6.41 0.85
42101024300 Black 444 6.25 0.84
42101023900 Black 441 6.21 0.72
42101012201 Black 428 6.03 0.76
Code
# Calculate concentration statistics.
total_system_filings <- sum(tract_totals$Total_Filings)
top_20_pct <- sum(top_risk_tracts$Total_Filings) / total_system_filings * 100

cat(sprintf("Concentration: Top 20 tracts account for %.1f%% of all filings\n", top_20_pct))
Concentration: Top 20 tracts account for 17.3% of all filings

Pre-Pandemic Baseline Comparison

Weekly Filings vs Baseline Ratio

Code
# Calculate system-wide ratio to pre-pandemic baseline.
weekly_baseline_ratio <- df_weekly %>%
  group_by(date) %>%
  summarize(
    Total_Filings = sum(filings_count, na.rm = TRUE),
    Total_Baseline = sum(filings_avg_prepandemic_baseline, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  filter(Total_Baseline > 0) %>%
  mutate(Ratio_to_Baseline = Total_Filings / Total_Baseline)

# Create line plot showing normalized recovery.
baseline_ratio_plot <- ggplot(weekly_baseline_ratio, aes(x = date, y = Ratio_to_Baseline)) +
  geom_line(color = "#16A085", linewidth = 0.8, alpha = 0.6) +
  geom_smooth(method = "loess", se = TRUE, color = "#E74C3C", linewidth = 1.2, span = 0.15) +
  geom_hline(yintercept = 1, linetype = "dashed", color = "black", linewidth = 1) +
  annotate("text", x = min(weekly_baseline_ratio$date), y = 1.05,
           label = "Pre-Pandemic Average", hjust = 0, fontface = "bold") +
  geom_vline(xintercept = as.Date("2020-03-01"), 
             linetype = "dotted", color = "gray50", alpha = 0.7) +
  geom_vline(xintercept = as.Date("2021-09-30"), 
             linetype = "dotted", color = "gray50", alpha = 0.7) +
  labs(
    title = "Weekly Filings Normalized to Pre-Pandemic Baseline",
    subtitle = "Ratio > 1 indicates system stress relative to historical norm | Smoothed trend in red",
    x = "Date (Week Ending)",
    y = "Weekly Filings / Pre-Pandemic Average Ratio",
    caption = "Data: Philadelphia Weekly Eviction Filings"
  ) +
  theme_minimal(base_size = 12) +
  theme(plot.title = element_text(face = "bold", size = 14))

baseline_ratio_plot

Code
# Calculate summary statistics by period.
weekly_baseline_ratio %>%
  mutate(Period = case_when(
    date < as.Date("2020-03-01") ~ "Pre-Moratorium",
    date >= as.Date("2020-03-01") & date <= as.Date("2021-09-30") ~ "Moratorium",
    date > as.Date("2021-09-30") ~ "Post-Moratorium"
  )) %>%
  group_by(Period) %>%
  summarize(
    Mean_Ratio = mean(Ratio_to_Baseline, na.rm = TRUE),
    Median_Ratio = median(Ratio_to_Baseline, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  kable(digits = 2) %>%
  kable_styling(bootstrap_options = c("striped", "hover"))
Period Mean_Ratio Median_Ratio
Moratorium 0.23 0.22
Post-Moratorium 0.64 0.63
Pre-Moratorium 1.13 1.12

Spatial Analysis

Code
# Tracts data.
tract_geo <- st_read("data/pa_tracts/pa_tracts.shp") %>%
  filter(COUNTYFP == "101")
Reading layer `pa_tracts' from data source 
  `C:\Users\Tess\Desktop\UPenn\UPenn_FW25\MUSA_5080-401_Public_Policy_Analytics\shark-tank\data\pa_tracts\pa_tracts.shp' 
  using driver `ESRI Shapefile'
Simple feature collection with 3446 features and 12 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: -80.51985 ymin: 39.7198 xmax: -74.68956 ymax: 42.51607
Geodetic CRS:  NAD83
Code
# Calculate total filings per tract across the entire study period.
df_monthly_agg <- df_monthly %>%
  group_by(GEOID) %>%
  summarise(
    Total_Filings = sum(filings_count, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  mutate(GEOID = as.character(GEOID))

# Perform spatial join for long-term risk.
tract_map_monthly <- tract_geo %>%
  left_join(df_monthly_agg, by = "GEOID")
Code
# Calculate the average weekly filings per tract across the study period.
df_weekly_agg <- df_weekly %>%
  group_by(GEOID) %>%
  summarise(
    Avg_Weekly_Filings = mean(filings_count, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  mutate(GEOID = as.character(GEOID))

# Average weekly filings per tract calculated for short-term risk map.
tract_map_weekly <- tract_geo %>%
  left_join(df_weekly_agg, by = "GEOID")

Choropleths

Code
# Define a common color scale for visual consistency.
common_fill_scale <- scale_fill_viridis_c(
  trans = "log10",
  labels = comma,
  na.value = "gray70",
  direction = -1,
  name = "Count (Log)"  # Add explicit shared legend name
)

# Total Long-Term Filings (Overall Risk)
map_total_risk <- ggplot(tract_map_monthly) +
  geom_sf(aes(fill = Total_Filings + 1), color = "white", linewidth = 0.1) +
  common_fill_scale +
  labs(
    title = "Total Eviction Filings (2020-2023)",
    subtitle = "Long-term risk and structural disparity."
  ) +
  coord_sf(datum = NA) +
  theme_void()

# Average Weekly Filings (Volatility/Short-Term Risk)
map_weekly_avg <- ggplot(tract_map_weekly) +
  geom_sf(aes(fill = Avg_Weekly_Filings + 1), color = "white", linewidth = 0.1) +
  common_fill_scale +
  labs(
    title = "Average Weekly Filings",
    subtitle = "Normalized view of short-term filing activity."
  ) +
  coord_sf(datum = NA) +
  theme_void()

# Map showing average monthly and weekly eviction filings.
(map_total_risk | map_weekly_avg) +
  plot_layout(guides = "collect") &
  theme(legend.position = "bottom", legend.direction = "horizontal")